import pandas as pd
import os

# 去重，去除文件中存在另一个文件中存在企业名称，并删除整行内容

# 获取文件夹中的所有公司名称
def get_company_names_from_folder(folder_path):
    company_names = set()
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            if 'Company_Name' in df.columns:
                company_names.update(df['Company_Name'].tolist())
    return company_names


# 从文件中移除指定的公司名称及其对应的行
def remove_companies_from_files(folder_path, companies_to_remove):
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            if 'Company_Name' in df.columns:
                df_filtered = df[~df['Company_Name'].isin(companies_to_remove)]
                df_filtered.to_csv(file_path, index=False)
                print(f"Processed {filename}: Removed {len(df) - len(df_filtered)} rows.")


def main():
    folder1_path = 'folder1'
    folder2_path = 'folder2'

    # 获取 folder2 中的所有公司名称
    companies_in_folder2 = get_company_names_from_folder(folder2_path)

    # 从 folder1 中移除在 folder2 中存在的公司名称及其对应的行
    remove_companies_from_files(folder1_path, companies_in_folder2)


if __name__ == "__main__":
    main()
